Analyzing BANKSY Embedding for Cell Identities

Author

Peter Youyun Zheng

Published

July 30, 2024

Introduction

In this doc, we will be looking at the banksy clusters and try to interpret them based on their identity, and other characteristics.

Load the data

The objects are here:

  1. RDS object with Leiden Clusters: /xchip/beroukhimlab/coja/Spatial_PLGG/data/Xenium/Xenium_Objects/total_spatial_banksy_clusters_20240711_135041.rds

    • /xchip/beroukhimlab/coja/Spatial_PLGG/data/Xenium/Xenium_Objects/total_spatial_banksy_embeddings/total_spatial_banksy_clusters_subset_20240711_135041.rds
  2. RDS object with cell type cluster markers: /xchip/beroukhimlab/coja/Spatial_PLGG/data/Xenium/Xenium_Objects/total_spatial_banksy_clusters_markers_20240711_135041.rds

Metadata is here:

  1. /Users/youyun/Documents/HMS/PhD/beroukhimlab/broad_mount/coja/Spatial_PLGG/data/metadata/Xenium_PS.xlsx
Code
banksy_embeddings = readRDS(paste0(
    workdir,'coja/Spatial_PLGG/data/Xenium/Xenium_Objects/total_spatial_banksy_clusters_subset_20240719_092550.rds'
))
colData(banksy_embeddings)$sample_id = factor(colData(banksy_embeddings)$sample_id)

cell_type_markers = fread(paste0(
    workdir,'coja/Spatial_PLGG/data/markers/final_manual_markers.csv'
))
xenium_markers = fread(paste0(
    workdir, 'youyun/plgg/data/xenium_selection/Xenium_hBrain_v1_metadata.csv'
))

Looking at the harmony corrected embeddings

We will first look at the extent of batch effect in the cohort by looking at the embeddings before and after harmony correction.

Code
plot_grid(
    ggdraw() + draw_label("BANKSY Embedding UMAP Before/After Harmony (Cell Identity)", fontface='bold'),
    plot_grid(
        plotReducedDim(
            banksy_embeddings, "UMAP_M1_lam0.2", point_size = 0.1,
            point_alpha = 0.5, color_by = "sample_id"
        ) +
            theme(legend.position = "none"),
        plotReducedDim(
            banksy_embeddings, "UMAP_Harmony_BANKSY_lam0.2", 
            point_size = 0.1,point_alpha = 0.5, color_by = "sample_id"
        ) +
            theme(legend.title = element_blank()) +
            guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))),
        nrow = 1,
        rel_widths = c(1, 1.2)
    ), ncol = 1,rel_heights = c(0.1, 5)
)

Code
plot_grid(
    ggdraw() + draw_label("BANKSY Embedding UMAP Before/After Harmony (Neighborhood)", fontface='bold'),
    plot_grid(
        plotReducedDim(
            banksy_embeddings, "UMAP_M1_lam0.8", point_size = 0.1,
            point_alpha = 0.5, color_by = "sample_id"
        ) +
            theme(legend.position = "none"),
        plotReducedDim(
            banksy_embeddings, "UMAP_Harmony_BANKSY_lam0.8", 
            point_size = 0.1,point_alpha = 0.5, color_by = "sample_id"
        ) +
            theme(legend.title = element_blank()) +
            guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))),
        nrow = 1,
        rel_widths = c(1, 1.2)
    ), ncol = 1,rel_heights = c(0.1, 5)
)

What do the cell clusters look like?

Code
res = 1
clust_name = gsub('.0$','',sprintf('clust_Harmony_BANKSY_lam0.2_k50_res%.1f',res))

We will be looking at resolution = 1 for the clusters using the clust_Harmony_BANKSY_lam0.2_k50_res1 column.

Code
plot_grid(
    ggdraw() + draw_label("BANKSY Embedding UMAP for Cell Identity(L) and Niche(R)", fontface='bold'),
    plot_grid(
        plotReducedDim(
            banksy_embeddings, "UMAP_Harmony_BANKSY_lam0.2", 
            point_size = 0.1, point_alpha = 0.5, 
            color_by = clust_name
        ) +
            theme(legend.title = element_blank()) +
            guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))),
        plotReducedDim(
            banksy_embeddings, "UMAP_Harmony_BANKSY_lam0.8", 
            point_size = 0.1, point_alpha = 0.5, 
            color_by = gsub('lam0.2','lam0.8',clust_name)
        ) +
            theme(legend.title = element_blank()) +
            guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))),
        nrow = 1,
        rel_widths = c(1, 1)
    ), ncol = 1,rel_heights = c(0.1, 5)
)

Just to get an idea of where each cluster is, we can look at the UMAPs colored individually by the clusters.

Code
cell_clusters = sort(unique(as.numeric(colData(banksy_embeddings)[,clust_name])))
# one hot encoded columns
one_hot_encoded_clusters = data.frame(dcast(data.table(
    cell_sample_id = rownames(colData(banksy_embeddings)),
    clusters = paste0('cell_in_',colData(banksy_embeddings)[,clust_name])
), cell_sample_id ~ clusters, length)[order(match(cell_sample_id,rownames(colData(banksy_embeddings))))])
# cbind into the colData
if(all(rownames(colData(banksy_embeddings)) == colData(banksy_embeddings)$cell_sample_id)){
    colData(banksy_embeddings) = cbind(
        colData(banksy_embeddings),
        one_hot_encoded_clusters
    )
}else{
    stop('Cell IDs do not match')
}

# make the plots
a = lapply(cell_clusters, function(x){
    assign(
        paste0('cell_type_plot_',x),
        plotReducedDim(
            banksy_embeddings, "UMAP_Harmony_BANKSY_lam0.2", 
            point_size = 0.1, point_alpha = 0.5, 
            color_by = paste0('cell_in_',x)
        )  + theme(
            legend.position = "none",
            axis.text=element_blank(),
            axis.ticks=element_blank(),
            axis.title=element_blank()
        ) + labs(title = paste0('Cluster ',x)),
        envir = .GlobalEnv
    )
})

plot_grid(
    ggdraw() + draw_label("BANKSY Embedding UMAP by Cluster", fontface='bold'),
    plot_grid(
        plotlist = mget(paste0('cell_type_plot_',cell_clusters)),
        ncol = 6,
        rel_widths = rep(1,length(cell_clusters))
    ), ncol = 1,rel_heights = c(0.1, 5)
)

What are the cell types in the clusters?

Manually- and Xenium- annotated cell type markers

Code
markers_to_score = rbind(cell_type_markers[
    category == 'Cell Type' & xenium == TRUE,
    .(markers = marker),.(annotation = paste0(annotation, '_manual'))
],xenium_markers[
    ,.(markers = Genes),.(annotation = paste0(Annotation, '_xenium'))
])[
    ,annotation := make.names(annotation)
]
markers_to_score = markers_to_score[annotation %in% markers_to_score[,.N, .(annotation)][N>=2]$annotation]

# Scoring the cells by cell type markers
cell_types = unique(markers_to_score$annotation)
marker_score = do.call('cbind',lapply(cell_types, function(x){
    genes_of_interest = markers_to_score[annotation == x]$markers
    cell_scores = log10(colSums(
        assay(banksy_embeddings[genes_of_interest,], 'normcounts')
    )/length(genes_of_interest) + 1)
    cell_scores_df = data.frame(cell_scores)
    rownames(cell_scores_df) = names(cell_scores)
    colnames(cell_scores_df) = x
    cell_scores_df
}))
colData(banksy_embeddings)[,cell_types] = NULL
if(all(rownames(colData(banksy_embeddings)) == rownames(colData(banksy_embeddings)))){
    colData(banksy_embeddings) = cbind(colData(banksy_embeddings),marker_score)
}else{
    stop('Cell IDs do not match')
}

# make the plots
a = lapply(cell_types, function(x){
    assign(
        paste0('plot_',x),
        plotReducedDim(
            banksy_embeddings, "UMAP_Harmony_BANKSY_lam0.2", 
            point_size = 0.1, point_alpha = 0.5, 
            color_by = x
        ) +
            theme(legend.title = element_blank(), legend.position = 'bottom') +
            guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1), nrow = 1)) +
            labs(title = x),
        envir = .GlobalEnv
    )
})

We are going to annotate 34 cell types. and we will now visualize the marker scores.

Code
plot_grid(
    ggdraw() + draw_label("BANKSY Embedding UMAP by Manually Curated Markers", fontface='bold'),
    plot_grid(
        plotlist = mget(paste0('plot_',grep('_manual',cell_types, value = TRUE))),
        ncol = 4,
        rel_widths = rep(1,sum(grepl('_manual',cell_types)))
    ), ncol = 1,rel_heights = c(0.1, 5)
)

Code
plot_grid(
    ggdraw() + draw_label("BANKSY Embedding UMAP by Xenium Markers", fontface='bold'),
    plot_grid(
        plotlist = mget(paste0('plot_',grep('_xenium',cell_types, value = TRUE))),
        ncol = 5,
        rel_widths = rep(1,sum(grepl('_xenium',cell_types)))
    ), ncol = 1,rel_heights = c(0.1, 5)
)

We should complement the manual approach with more statistics

Looking at these ‘metagenes’ statistically

A better way to do this is to test statistically, which clusters are enriched for which signature. We will do that here

Code
cell_type_cluster_sig = do.call('rbind',lapply(paste0('cell_in_',cell_clusters), function(x){
    scran_cell_markers = findMarkers(
        as.matrix(t(colData(banksy_embeddings)[,cell_types])),
        groups = colData(banksy_embeddings)[,x],
        test.type="wilcox", direction="up"
    )    
    delta_dt = melt(setDT(
        as.data.frame(colData(banksy_embeddings)[,c(cell_types,x)]),
        keep.rownames = 'cell_sample_id'
    ), id.vars = c('cell_sample_id',x), variable.name = 'cell_type', value.name = 'score')[
        ,.(delta_score = mean(score[get(x) == 1]) - mean(score[get(x) == 0])),.(cell_type)
    ]
    cluster_cell_type = setDT(
        data.frame(scran_cell_markers[[2]]),
        keep.rownames = 'cell_type'
    )[,.(cell_type = make.names(cell_type), Top, p.value, FDR, cluster = x)]
    merge(
        cluster_cell_type, delta_dt, by = 'cell_type'
    )
}))

# make a hierarchically clustered heatmap
cell_type_cluster_sig_matrix = dcast(
    cell_type_cluster_sig[,.(cell_type, cluster, FDR = ifelse(FDR<0.05,'*',''))],
    cluster ~ cell_type, value.var = 'FDR'
)
cell_type_cluster_delta_matrix = dcast(
    cell_type_cluster_sig[,.(cell_type, cluster, delta_score)],
    cluster ~ cell_type, value.var = 'delta_score'
)
plot_sig_mt = as.matrix(cell_type_cluster_sig_matrix[,c(2:ncol(cell_type_cluster_sig_matrix)), with = FALSE])
plot_delta_mt = as.matrix(cell_type_cluster_delta_matrix[,c(2:ncol(cell_type_cluster_delta_matrix)), with = FALSE])
rownames(plot_sig_mt) = cell_type_cluster_sig_matrix$cluster
rownames(plot_delta_mt) = cell_type_cluster_delta_matrix$cluster
col_fun = colorRamp2(c(-0.3,0,0.3), c("red", "white", "green"))
Heatmap(
    plot_delta_mt, name = "Delta Mean Norm Counts", col = col_fun,
    cell_fun = function(j, i, x, y, width, height, fill) {
        grid.text(sprintf("%s", plot_sig_mt[i, j]), x, y, gp = gpar(fontsize = 10))
})

Looking at the DE Genes in each cluster

Code
ggplot(colData(banksy_embeddings)) + 
    geom_bar(aes(x = get(clust_name), fill = as.numeric(get(clust_name)))) + 
    facet_wrap(~sample_id, nrow = 3, scale = 'free_y') +
    theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        text = element_text(size = 15)
    ) + labs(
        x = 'Cluster ID', y = 'Number of Cells (log10)'
    ) + scale_fill_viridis_c(option = 'D') +
    guides(fill = guide_legend(title = 'Cluster ID')) +
    scale_y_log10()

Code
banksy_embeddings_seurat = as.Seurat(banksy_embeddings, counts = 'counts', data = NULL)
banksy_embeddings_seurat = NormalizeData(
    banksy_embeddings_seurat, normalization.method = "LogNormalize", 
    scale.factor = 10000
)
Idents(banksy_embeddings_seurat) = clust_name

DE_genes_wilcox = data.table(FindAllMarkers(
    banksy_embeddings_seurat,
    slot = 'counts',
    test.use = 'wilcox',
    only.pos = TRUE,
    verbose = FALSE
))

DE_genes_wilcox = merge(
    DE_genes_wilcox,
    xenium_markers[,.(gene = Genes, Annotation)],
    by = 'gene'
)

kable(DE_genes_wilcox[order(p_val_adj)][
    p_val_adj <= 0.05 & avg_log2FC >= 1, .(
        N = .N, 
        `Markers` = paste0(
            gene[avg_log2FC >= 1], ' (', 
            round(avg_log2FC[avg_log2FC >= 1], 2),  ', ',
            Annotation[avg_log2FC >= 1] ,
            ')', collapse = ', '
        )
    ), 
    .(cluster)
][order(cluster)])
cluster N Markers
1 14 ABCC9 (1.33, VLMC), B4GALNT1 (1.32, Glioblastoma (Cancer cells)), CNTN2 (1.43, Oligodendrocyte), FGFR3 (1.02, Astrocyte), GAD2 (1.03, Lamp5 Lhx6), IGFBP5 (1.1, Glioblastoma (Cancer cells)), NR4A2 (1.79, Glioblastoma (TME)), PLD5 (1.27, Pax6), RELN (1.47, Pax6), SDK1 (1.8, Chandelier), SPHKAP (1.31, Lamp5 Lhx6), TRAC (1.14, Glioblastoma (TME)), TRPC6 (1.39, Sst Chodl), UNC5B (1.14, Chandelier)
2 12 FBLN1 (1.69, VLMC), NNAT (3.09, Glioblastoma (Cancer cells)), SOX11 (2.69, Glioblastoma (Cancer cells)), CUX2 (2.18, L2/3 IT), HS3ST4 (2.34, L6 CT), LRRK2 (2.47, OPC), NOTCH1 (1.58, Endothelial), RASGRP1 (1, L6 IT Car3), FSTL4 (1.55, L4 IT), SNCG (2.11, L5 ET), NRP1 (1.02, Endothelial), NPY1R (1.03, L6 IT Car3)
3 16 BRINP3 (2.36, OPC), CENPF (1.93, Proliferation), LAMP5 (2.31, Lamp5 Lhx6), MEIS2 (1.88, Astrocyte), NDST4 (3.37, Sst Chodl), NTNG2 (1.44, L6 IT Car3), PAX6 (2.02, Astrocyte), PLCH1 (1.74, Sst), RFTN1 (1.83, VLMC), RORB (1.82, L4 IT), TENM1 (2.8, Chandelier), ANGPT1 (1.5, Pax6), PDGFD (1.03, Lamp5 Lhx6), ARHGAP24 (1.24, Microglia-PVM), IDH1 (1.26, Glioblastoma (Cancer cells)), CDH4 (1.13, Pax6)
4 40 AIF1 (2.68, Glioblastoma (TME)), APOE (1.01, Microglia-PVM), C1orf162 (2.1, Glioblastoma (TME)), CD14 (2.94, Glioblastoma (TME)), CD163 (4.06, Glioblastoma (TME)), CD4 (2.37, Glioblastoma (TME)), CD68 (2.7, Glioblastoma (TME)), CD86 (2.17, Microglia-PVM), CORO1A (2.25, Glioblastoma (TME)), CTSH (1.58, Microglia-PVM), CTSS (2.68, Glioblastoma (TME)), CX3CR1 (3.09, Glioblastoma (TME)), FCER1G (2.85, Glioblastoma (TME)), FCGR1A (2.4, Glioblastoma (TME)), FCGR3A (3.1, Glioblastoma (TME)), GPR34 (3.18, Glioblastoma (TME)), HLA-DMB (2.6, Glioblastoma (TME)), HLA-DQA1 (2.67, Microglia-PVM), ITGAM (2.16, Microglia-PVM), ITGAX (2.89, Microglia-PVM), ITGB2 (3.22, Glioblastoma (TME)), LRRK1 (1.65, Microglia-PVM), LY86 (1.77, Glioblastoma (TME)), LYVE1 (2.98, Microglia-PVM), MS4A6A (3.31, Glioblastoma (TME)), P2RY12 (2.83, Microglia-PVM), P2RY13 (2.52, Microglia-PVM), PTPRC (2.34, Microglia-PVM), RGS10 (2.44, Glioblastoma (TME)), RNASET2 (2.4, Glioblastoma (TME)), SPI1 (2.79, Microglia-PVM), STXBP2 (1.31, Glioblastoma (TME)), TGFB1 (2.13, Microglia-PVM), TGFBI (2.98, Glioblastoma (TME)), TMIGD3 (2.64, Glioblastoma (TME)), TREM2 (2.81, Microglia-PVM), CXCR4 (1.58, Glioblastoma (TME)), GPR183 (1.21, Glioblastoma (TME)), CAPG (1.1, Glioblastoma (TME)), GPNMB (1.1, Glioblastoma (TME))
5 33 APOE (1.55, Microglia-PVM), BCAN (3.5, Glioblastoma (Cancer cells)), BRINP3 (1.24, OPC), CALCRL (1.31, OPC), CAV1 (1.23, Glioblastoma (Cancer cells)), CSPG4 (1.91, VLMC), ERBB3 (2.67, Oligodendrocyte), GAD1 (3.57, Pvalb), GPNMB (3.22, Glioblastoma (TME)), IGFBP3 (1.15, Glioblastoma (Cancer cells)), IGFBP4 (1.06, VLMC), ITGA8 (1.23, OPC), MOG (1.71, Oligodendrocyte), NES (2.07, Astrocyte), NTNG1 (2.67, Lamp5 Lhx6), OLIG1 (3.59, OPC), OLIG2 (3.75, OPC), PCSK1 (1.24, L5 ET), PDGFRA (4.26, OPC), PLCE1 (1.61, VLMC), POSTN (3.85, L6 IT Car3), PTCHD4 (1.26, Lamp5), PTPRZ1 (2.75, OPC), RIT2 (1.06, L5 ET), SEMA5A (3.21, OPC), SERPINA3 (2.38, VLMC), SORCS1 (3.21, L5 ET), SOX10 (4.26, OPC), STK32B (3.47, OPC), TMEM132C (2.04, VLMC), TTYH1 (1.05, Glioblastoma (Cancer cells)), UGT8 (3.04, Oligodendrocyte), VCAN (4.01, OPC)
6 33 ANXA1 (1.92, Glioblastoma (Cancer cells)), CALCRL (3.47, OPC), CEMIP (3.69, VLMC), CEMIP2 (2.26, Endothelial), DCN (4.03, VLMC), FLT1 (4.14, Endothelial), IFITM3 (2.71, Microglia-PVM), IGFBP3 (3.52, Glioblastoma (Cancer cells)), IGFBP4 (3.98, VLMC), KLF2 (3.04, Glioblastoma (TME)), KLF4 (2.5, Glioblastoma (TME)), NES (1.8, Astrocyte), PECAM1 (4.15, Endothelial), PHLDB2 (2.28, VLMC), RNF144B (2.72, Endothelial), TGFBI (2.13, Glioblastoma (TME)), TGFB1 (1.78, Microglia-PVM), THSD4 (1.83, Endothelial), CAV1 (1.65, Glioblastoma (Cancer cells)), NRP1 (1.23, Endothelial), CDH6 (2.02, VLMC), HES1 (1.83, Glioblastoma (Cancer cells)), NOTCH1 (1.01, Endothelial), CD4 (1.46, Glioblastoma (TME)), LAMA2 (1.21, VLMC), THBS1 (2.18, Astrocyte), LYVE1 (1.24, Microglia-PVM), COL12A1 (1.33, VLMC), DDR2 (1.05, Pax6), SLIT3 (2.06, L5 ET), FILIP1 (1.22, L6 CT), ITGA8 (1.36, OPC), SNTB2 (1.08, L6 IT)
7 30 ADRA1A (1.32, Sncg), ALK (1.47, Chandelier), APOE (2.06, Microglia-PVM), AQP4 (2.66, Astrocyte), BCAN (1.54, Glioblastoma (Cancer cells)), COL12A1 (2.38, VLMC), CRYM (1.45, L5/6 NP), EGFR (1.23, Glioblastoma (Cancer cells)), ELOVL2 (1.87, Glioblastoma (Cancer cells)), GJA1 (4.04, Astrocyte), HES1 (1.1, Glioblastoma (Cancer cells)), HHATL (1.26, Oligodendrocyte), IDH2 (1.01, Glioblastoma (Cancer cells)), NPNT (1.42, Chandelier), OLIG1 (1.91, OPC), OLIG2 (1.65, OPC), PTCHD4 (1.33, Lamp5), PTPRZ1 (2.29, OPC), RYR3 (3.1, Astrocyte), SEMA5A (1.66, OPC), SLC17A7 (2.2, L6b), SPON1 (3.73, Astrocyte), TGFB2 (1.96, Astrocyte), TRIL (3.5, Glioblastoma (Cancer cells)), TTYH1 (3.74, Glioblastoma (Cancer cells)), TSHZ2 (1.12, L5/6 NP), CCK (1.68, Pax6), PLCE1 (1.22, VLMC), LYPD6 (1.07, Lamp5 Lhx6), EYA4 (1.05, Lamp5 Lhx6)
8 20 ANGPT1 (1.26, Pax6), BRINP3 (1.51, OPC), CHODL (2.86, Glioblastoma (Cancer cells)), CXCL14 (4.29, Pax6), CYTIP (1.8, Glioblastoma (TME)), GAD2 (1.42, Lamp5 Lhx6), LAMP5 (1.41, Lamp5 Lhx6), MEIS2 (1.1, Astrocyte), NDST4 (1.96, Sst Chodl), PAX6 (1.53, Astrocyte), RASGRP1 (1.84, L6 IT Car3), RGS16 (1.17, Microglia-PVM), SYNPR (3.38, Sst), TENM1 (1.47, Chandelier), VWC2L (2.49, L5/6 NP), MGST1 (1.15, Glioblastoma (Cancer cells)), NPY1R (1.16, L6 IT Car3), POU6F2 (1.52, L4 IT), FILIP1 (1.07, L6 CT), MAL (1.21, Oligodendrocyte)
9 19 CCL5 (5.65, Glioblastoma (TME)), CD2 (4.48, Glioblastoma (TME)), CD3G (4.2, Glioblastoma (TME)), CD48 (3.58, Glioblastoma (TME)), CD52 (5.08, Glioblastoma (TME)), GZMA (5.06, Glioblastoma (TME)), IL7R (4.04, Glioblastoma (TME)), NKG7 (4.79, Glioblastoma (TME)), KLRB1 (3.32, Glioblastoma (TME)), TRAC (2.34, Glioblastoma (TME)), THEMIS (2.63, L6 IT Car3), S100A4 (2.7, Glioblastoma (TME)), CXCR4 (1.95, Glioblastoma (TME)), CYTIP (1.48, Glioblastoma (TME)), TESPA1 (1.83, L2/3 IT), PTPRC (1.6, Microglia-PVM), GNLY (2.15, Glioblastoma (TME)), CORO1A (1.94, Glioblastoma (TME)), IDO1 (1.19, Glioblastoma (TME))
10 16 ANXA1 (2.36, Glioblastoma (Cancer cells)), APOE (2.48, Microglia-PVM), AQP4 (1.96, Astrocyte), BCAN (1.22, Glioblastoma (Cancer cells)), GJA1 (1.17, Astrocyte), IFITM3 (1.29, Microglia-PVM), MGST1 (2.65, Glioblastoma (Cancer cells)), NES (1.27, Astrocyte), SERPINA3 (3.69, VLMC), SFRP2 (2.38, Pax6), TTYH1 (1.45, Glioblastoma (Cancer cells)), VCAN (1.01, OPC), CTSH (1.74, Microglia-PVM), SPON1 (1.02, Astrocyte), THBS1 (1.38, Astrocyte), POSTN (1.37, L6 IT Car3)
11 57 KCNH5 (2.74, L6 CT), MYO5B (2.1, Pvalb), WIF1 (2.61, Pax6), ZBBX (2.87, L2/3 IT), MCTP2 (2.52, VLMC), ANKRD18A (2.59, Sst Chodl), ADAMTS12 (2.47, VLMC), IDO1 (2.56, Glioblastoma (TME)), VIP (2.47, Vip), FASLG (2.69, Microglia-PVM), OTOGL (2.6, L4 IT), NPFFR2 (2.91, L6b), LOX (2.2, Glioblastoma (Cancer cells)), MEPE (2.12, Pvalb), ANO3 (2.36, VLMC), CAV1 (2.26, Glioblastoma (Cancer cells)), TESPA1 (2.32, L2/3 IT), LYPD6B (2.15, Sncg), TPH2 (2.67, L5/6 NP), SLC17A6 (2.11, L4 IT), ADAMTS3 (2.38, L6 IT), CTNNA3 (2.44, Oligodendrocyte), ROS1 (2.13, L6b), RSPO2 (2.09, Pvalb), THEMIS (1.89, L6 IT Car3), CRHBP (2.02, Sst Chodl), CHODL (1.73, Glioblastoma (Cancer cells)), NWD2 (2.42, L6 IT Car3), POU6F2 (2.27, L4 IT), TRPC5 (1.84, L6 IT Car3), LHX6 (1.93, Chandelier), CDH12 (1.77, L6 IT Car3), SLC26A4 (1.71, L6 IT Car3), SAMD5 (2.32, Pvalb), CNTNAP3B (1.65, Chandelier), ADRA1B (2.24, Sncg), KLRB1 (1.67, Glioblastoma (TME)), NPY1R (1.61, L6 IT Car3), CD36 (1.38, L5/6 NP), OPALIN (1.71, Oligodendrocyte), ST18 (1.52, Oligodendrocyte), NXPH2 (1.55, Sst Chodl), GNLY (2.06, Glioblastoma (TME)), EYA4 (1.8, Lamp5 Lhx6), MOBP (1.3, Oligodendrocyte), HTR2A (1.67, L5 IT), NTNG1 (1.15, Lamp5 Lhx6), HTR2C (1.67, L5/6 NP), THSD4 (1.51, Endothelial), ANK1 (1.65, Chandelier), FILIP1 (1.67, L6 CT), PHLDB2 (1.49, VLMC), FSTL4 (1.47, L4 IT), SFRP2 (1.68, Pax6), VWC2L (1.4, L5/6 NP), IL7R (1.13, Glioblastoma (TME)), ADAMTS16 (2.01, L4 IT)
12 12 B4GALNT1 (1.06, Glioblastoma (Cancer cells)), CCNB2 (3.04, Proliferation), CDK1 (2.43, Proliferation), MKI67 (2.54, Proliferation), TOP2A (2.34, Proliferation), TRPC6 (1.06, Sst Chodl), NR4A2 (1.17, Glioblastoma (TME)), GAS2L3 (2.46, L6 IT Car3), CENPF (1.76, Proliferation), SDK1 (1.5, Chandelier), KLK6 (1.22, Oligodendrocyte), CCNA1 (1.21, Proliferation)
13 16 CCL4 (5.55, Microglia-PVM), CD83 (4.72, Glioblastoma (TME)), GPR183 (3, Glioblastoma (TME)), NR4A2 (1.43, Glioblastoma (TME)), ITGAX (1.68, Microglia-PVM), POSTN (1.59, L6 IT Car3), CD86 (1.73, Microglia-PVM), GPR34 (1.05, Glioblastoma (TME)), TREM2 (1.78, Microglia-PVM), FCGR1A (1.11, Glioblastoma (TME)), AIF1 (1.02, Glioblastoma (TME)), SPI1 (1.33, Microglia-PVM), CD68 (1.02, Glioblastoma (TME)), CD163 (1.29, Glioblastoma (TME)), TMIGD3 (1.08, Glioblastoma (TME)), CD14 (1.03, Glioblastoma (TME))
14 39 ADAMTS16 (3.38, L4 IT), ANO3 (2.4, VLMC), AQP4 (1.07, Astrocyte), C1QL3 (5.41, L6 IT), CCK (5.23, Pax6), CRYM (5.19, L5/6 NP), GJA1 (1.85, Astrocyte), HS3ST2 (4.22, L6 CT), PTPRZ1 (1.17, OPC), SLC17A7 (5.65, L6b), SLIT3 (4.24, L5 ET), TRIL (1.52, Glioblastoma (Cancer cells)), TSHZ2 (3.25, L5/6 NP), TTYH1 (1.71, Glioblastoma (Cancer cells)), OLIG1 (1.07, OPC), SLC17A6 (2.03, L4 IT), RXFP1 (2.87, L5 IT), SPON1 (1.44, Astrocyte), HTR2A (1.65, L5 IT), CDH12 (1.97, L6 IT Car3), ADRA1B (2.15, Sncg), PCSK1 (2.09, L5 ET), SNCG (1.64, L5 ET), COL12A1 (1.38, VLMC), RSPO2 (1.49, Pvalb), RYR3 (1.06, Astrocyte), CORO1A (1.27, Glioblastoma (TME)), CCNA1 (1.52, Proliferation), FSTL4 (1.4, L4 IT), SYNPR (1.39, Sst), TRHDE (1.98, Sst), ZDHHC23 (1.22, L6 IT Car3), NWD2 (1.41, L6 IT Car3), SORCS1 (1.05, L5 ET), RIT2 (1.2, L5 ET), HTR2C (1.35, L5/6 NP), ST18 (1.96, Oligodendrocyte), SAMD5 (1.03, Pvalb), ANK1 (1.27, Chandelier)
15 4 KCNH5 (3.38, L6 CT), NPNT (2.52, Chandelier), RORB (1.95, L4 IT), APP (1.05, Broad)
16 8 CCNB2 (4.24, Proliferation), CDK1 (4.09, Proliferation), CENPF (3.02, Proliferation), MKI67 (5.76, Proliferation), TOP2A (5.99, Proliferation), GAS2L3 (3.48, L6 IT Car3), PCNA (1, Proliferation), CCNA1 (2.07, Proliferation)
17 15 CNDP1 (6.11, Oligodendrocyte), ERMN (4.86, Oligodendrocyte), MAG (6.12, Oligodendrocyte), MOBP (3.89, Oligodendrocyte), MOG (4.91, Oligodendrocyte), CAPN3 (3.42, Oligodendrocyte), OPALIN (3.19, Oligodendrocyte), UGT8 (3.06, Oligodendrocyte), CLDN11 (2.48, Oligodendrocyte), MAL (2.66, Oligodendrocyte), ST18 (2.28, Oligodendrocyte), KLK6 (2.73, Oligodendrocyte), MYRF (2.3, Oligodendrocyte), PCSK6 (1.59, Oligodendrocyte), HHATL (1.29, Oligodendrocyte)
18 48 B4GALNT1 (1.28, Glioblastoma (Cancer cells)), MYO5B (1.7, Pvalb), NTNG2 (1.04, L6 IT Car3), FGFR2 (1, Oligodendrocyte), NR4A2 (1.01, Glioblastoma (TME)), TACR1 (2.16, Sst Chodl), SLC26A4 (2.2, L6 IT Car3), NXPH2 (2.16, Sst Chodl), LYPD6B (2.21, Sncg), SPHKAP (1.05, Lamp5 Lhx6), MEPE (1.75, Pvalb), CDH1 (2.02, Oligodendrocyte), MCTP2 (1.38, VLMC), CDH12 (1.69, L6 IT Car3), HHATL (1.58, Oligodendrocyte), KLK6 (1.95, Oligodendrocyte), WIF1 (1.38, Pax6), ANK1 (1.47, Chandelier), ANKRD18A (1.3, Sst Chodl), LHX6 (1.45, Chandelier), NWD2 (1.79, L6 IT Car3), ADRA1A (1.14, Sncg), TRPC5 (1.68, L6 IT Car3), SDK1 (1.61, Chandelier), IDO1 (1.37, Glioblastoma (TME)), SFRP2 (1.49, Pax6), MYRF (1.57, Oligodendrocyte), TPH2 (1.88, L5/6 NP), ST18 (1.36, Oligodendrocyte), ROS1 (1.69, L6b), LOX (1.3, Glioblastoma (Cancer cells)), PHLDB2 (1.1, VLMC), ZDHHC23 (1.86, L6 IT Car3), THSD7B (1.29, Sncg), FASLG (1.37, Microglia-PVM), THSD4 (1.17, Endothelial), ATP2C2 (1.77, L5/6 NP), FSTL4 (1.16, L4 IT), CTNNA3 (1.36, Oligodendrocyte), PCSK6 (1.03, Oligodendrocyte), TESPA1 (1.36, L2/3 IT), HTR2C (1.62, L5/6 NP), ADAMTS16 (1.58, L4 IT), CSPG4 (1.31, VLMC), NPFFR2 (1.23, L6b), RXFP1 (1.37, L5 IT), ZBBX (1.06, L2/3 IT), RIT2 (1.14, L5 ET)
19 18 SST (6.81, Sst Chodl), TAC1 (6.51, Sst Chodl), PVALB (5.38, Chandelier), CRHBP (3.95, Sst Chodl), LHX6 (3.62, Chandelier), GAD1 (3.23, Pvalb), ANK1 (1.79, Chandelier), ALK (2.52, Chandelier), TRHDE (2.68, Sst), SYNPR (1.31, Sst), BTBD11 (1.22, Pvalb), SNCG (2.31, L5 ET), PCSK6 (1.22, Oligodendrocyte), TRPC5 (1.75, L6 IT Car3), NXPH2 (1.62, Sst Chodl), RSPO2 (1.35, Pvalb), KIT (1.89, Proliferation), SAMD5 (1.36, Pvalb)
20 47 THBS1 (5.73, Astrocyte), SLC17A6 (3.97, L4 IT), PCSK1 (4.39, L5 ET), ALK (4.82, Chandelier), GAD1 (4.07, Pvalb), ANK1 (2.65, Chandelier), CALCRL (3.42, OPC), PLCE1 (3.68, VLMC), CORO1A (2.57, Glioblastoma (TME)), NPY1R (2.25, L6 IT Car3), CNTNAP3B (2.3, Chandelier), RIT2 (2.23, L5 ET), ADRA1A (2, Sncg), SAMD5 (2.43, Pvalb), CCNA1 (2.85, Proliferation), LYPD6 (1.9, Lamp5 Lhx6), CX3CR1 (1.75, Glioblastoma (TME)), NES (1.92, Astrocyte), CDH4 (1.75, Pax6), OLIG2 (1.28, OPC), IGFBP3 (2.06, Glioblastoma (Cancer cells)), PTPRZ1 (1.54, OPC), NTNG1 (2.11, Lamp5 Lhx6), TPH2 (1.46, L5/6 NP), AIF1 (1.61, Glioblastoma (TME)), SERPINA3 (1.15, VLMC), CD83 (1.17, Glioblastoma (TME)), FCGR3A (1.49, Glioblastoma (TME)), STAT3 (1.21, Endothelial), SNTB2 (1.38, L6 IT), ANXA1 (1.01, Glioblastoma (Cancer cells)), BCAN (1.28, Glioblastoma (Cancer cells)), P2RY12 (1.02, Microglia-PVM), HLA-DQA1 (1.17, Microglia-PVM), TREM2 (1.73, Microglia-PVM), SOX10 (1.34, OPC), TTYH1 (1.1, Glioblastoma (Cancer cells)), PSEN2 (1.08, Glioblastoma (Cancer cells)), HLA-DMB (1.15, Glioblastoma (TME)), ZDHHC23 (1.18, L6 IT Car3), SORCS1 (1.85, L5 ET), PTCHD4 (1.09, Lamp5), STK32B (1.95, OPC), LRRK1 (1.2, Microglia-PVM), LY86 (1.09, Glioblastoma (TME)), CCK (1.49, Pax6), SNCG (2.48, L5 ET)
21 50 GPNMB (5.89, Glioblastoma (TME)), CD68 (2.88, Glioblastoma (TME)), TREM2 (3.13, Microglia-PVM), CTSS (3.09, Glioblastoma (TME)), PDGFRA (2.99, OPC), HLA-DMB (2.53, Glioblastoma (TME)), THBS1 (3.48, Astrocyte), MS4A6A (2.26, Glioblastoma (TME)), ANXA1 (2.68, Glioblastoma (Cancer cells)), APOE (2.21, Microglia-PVM), CD163 (2.43, Glioblastoma (TME)), CD48 (3.93, Glioblastoma (TME)), CEMIP (4.17, VLMC), CTSH (2.44, Microglia-PVM), FCER1G (1.71, Glioblastoma (TME)), HS3ST2 (3.7, L6 CT), CD4 (2.6, Glioblastoma (TME)), GPR34 (2.3, Glioblastoma (TME)), SOX10 (2.59, OPC), ITGAM (2.35, Microglia-PVM), AIF1 (2.22, Glioblastoma (TME)), VCAN (2.15, OPC), ITGB2 (2.96, Glioblastoma (TME)), IFITM3 (1.98, Microglia-PVM), CXCR4 (2.23, Glioblastoma (TME)), CAPG (2.84, Glioblastoma (TME)), ERBB3 (2.07, Oligodendrocyte), CD14 (2.47, Glioblastoma (TME)), IGFBP4 (2.84, VLMC), P2RY13 (1.94, Microglia-PVM), CD86 (1.48, Microglia-PVM), UGT8 (1.35, Oligodendrocyte), SORCS1 (2.87, L5 ET), SEMA5A (1.61, OPC), PLCE1 (1.02, VLMC), GAS2L3 (1.6, L6 IT Car3), LYVE1 (1.81, Microglia-PVM), NES (1.32, Astrocyte), POSTN (1.18, L6 IT Car3), DCN (2.24, VLMC), OLIG2 (1.52, OPC), HLA-DQA1 (2.32, Microglia-PVM), LY86 (1.09, Glioblastoma (TME)), PTPRC (1.82, Microglia-PVM), SERPINA3 (1.23, VLMC), CDH6 (2.09, VLMC), GPR183 (2.84, Glioblastoma (TME)), P2RY12 (1.93, Microglia-PVM), TGFBI (2.4, Glioblastoma (TME)), CCL4 (2.34, Microglia-PVM)

Preliminary annotation

With the information above, we can begin to piece together some preliminary definition for each cluster.

This Cell Paper and this Nature Paper

Code
prelim_annotation_res1 = data.table(t(setDT(list(
    `1` = c('Neuron','GABAergic Neuron (lamp5, pax6)'),
    `2` = c('Neuron','Glutamatergic Neuron (GBM CC Xenium)'),
    `12` = c('Neuron','GABAergic Neuron (Proliferating)'),
    `15` = c('Neuron','Glutamatergic Neuron'),
    `18` = c('Neuron','Unknown Neuron (GABAergic and Glutamatergic)'),
    `17` = c('Oligodendrocytes','Oligodendrocytes'),
    `16` = c('Proliferation','Proliferation'),
    `9` = c('Lymphoid','Lymphoid'),
    `11` = c('Neuron','Unknown Neuron (GABAergic and Glutamatergic)'),
    `19` = c('Neuron','GABAergic Neuron (sst, sst/chodl, pvalb, chandelier)'),
    `3` = c('RGC','RGC'),
    `8` = c('RGC','RGC'),
    `14` = c('Neuron','Glutamatergic Neuron (l6b)'),
    `20` = c('Undetermined','Undetermined'),
    `4` = c('Myeloid','Myeloid'),
    `6` = c('Stromal','Endothelial/VLMC'), 
    `13` = c('Myeloid','Myeloid'),
    `5` = c('OPC','OPC'),
    `7` = c('Astrocyte','Astrocyte'),
    `10` = c('Astrocyte','Astrocyte (VLMC, myeloid)')
))), keep.rownames = 'cluster')[
    , c('Coarse Cell Type', 'Fine Cell Type Annotation') := list(V1, V2)
][,c('V1','V2') := NULL][
    order(as.numeric(cluster)), 
    `Coarse Cell Type UMAP` := paste0(
        `Coarse Cell Type`, ' ', c(1:.N)
    ),.(`Coarse Cell Type`)
][,`Fine Cell Type` := gsub(
        ' \\(.*','', `Fine Cell Type Annotation`
)][
    order(as.numeric(cluster)),
    `Fine Cell Type UMAP` := paste0(
        `Fine Cell Type`, ' ', c(1:.N)
    ),.(`Fine Cell Type`)
]
setcolorder(
    prelim_annotation_res1,
    c(
        'cluster','Coarse Cell Type','Coarse Cell Type UMAP',
        'Fine Cell Type','Fine Cell Type UMAP',
        'Fine Cell Type Annotation'
    )
)
kable(prelim_annotation_res1)
write.table(
    prelim_annotation_res1,
    file = paste0(
        workdir,'coja/Spatial_PLGG/data/Xenium/banksy/banksy_cell_cluster_annotation.tsv'
    ),
    sep = '\t', quote = FALSE, row.names = FALSE
)
cluster Coarse Cell Type Coarse Cell Type UMAP Fine Cell Type Fine Cell Type UMAP Fine Cell Type Annotation
1 Neuron Neuron 1 GABAergic Neuron GABAergic Neuron 1 GABAergic Neuron (lamp5, pax6)
2 Neuron Neuron 2 Glutamatergic Neuron Glutamatergic Neuron 1 Glutamatergic Neuron (GBM CC Xenium)
12 Neuron Neuron 4 GABAergic Neuron GABAergic Neuron 2 GABAergic Neuron (Proliferating)
15 Neuron Neuron 6 Glutamatergic Neuron Glutamatergic Neuron 3 Glutamatergic Neuron
18 Neuron Neuron 7 Unknown Neuron Unknown Neuron 2 Unknown Neuron (GABAergic and Glutamatergic)
17 Oligodendrocytes Oligodendrocytes 1 Oligodendrocytes Oligodendrocytes 1 Oligodendrocytes
16 Proliferation Proliferation 1 Proliferation Proliferation 1 Proliferation
9 Lymphoid Lymphoid 1 Lymphoid Lymphoid 1 Lymphoid
11 Neuron Neuron 3 Unknown Neuron Unknown Neuron 1 Unknown Neuron (GABAergic and Glutamatergic)
19 Neuron Neuron 8 GABAergic Neuron GABAergic Neuron 3 GABAergic Neuron (sst, sst/chodl, pvalb, chandelier)
3 RGC RGC 1 RGC RGC 1 RGC
8 RGC RGC 2 RGC RGC 2 RGC
14 Neuron Neuron 5 Glutamatergic Neuron Glutamatergic Neuron 2 Glutamatergic Neuron (l6b)
20 Undetermined Undetermined 1 Undetermined Undetermined 1 Undetermined
4 Myeloid Myeloid 1 Myeloid Myeloid 1 Myeloid
6 Stromal Stromal 1 Endothelial/VLMC Endothelial/VLMC 1 Endothelial/VLMC
13 Myeloid Myeloid 2 Myeloid Myeloid 2 Myeloid
5 OPC OPC 1 OPC OPC 1 OPC
7 Astrocyte Astrocyte 1 Astrocyte Astrocyte 1 Astrocyte
10 Astrocyte Astrocyte 2 Astrocyte Astrocyte 2 Astrocyte (VLMC, myeloid)